This document will try to approach Ferrari’s historical analysis from a quantitative perspective, it is meant to be a complementary document to the case write app & excel worksheet submitted by Team-7.

Data Wrangling

# transform data into the correct format
df_tall[, c("Parameter", "Company", "Year", "Ticker", "Group")] <- lapply(df_tall[, c("Parameter", "Company", "Year", "Ticker", "Group")], as.factor)

# drop post analysis column
df_tall$Is.Leading.Indicator <- NULL
df_tall$Comparable.Class <- NULL

# show the final data format
str(df_tall)
## 'data.frame':    540 obs. of  6 variables:
##  $ Parameter: Factor w/ 12 levels "AP2Revenues",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ Company  : Factor w/ 15 levels "BMW","Burberry Group plc",..: 5 5 5 6 6 6 13 13 13 1 ...
##  $ Year     : Factor w/ 3 levels "2013","2014",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ Value    : num  0.0682 0.0604 0.043 0.0647 0.0353 ...
##  $ Ticker   : Factor w/ 15 levels "BMWYY","BRBY",..: 12 12 12 6 6 6 14 14 14 1 ...
##  $ Group    : Factor w/ 3 levels "Auto","Luxury",..: 3 3 3 1 1 1 1 1 1 1 ...

Wrangle data in preparation for the clustering analysis

# spread data around year
df <- spread(df_tall, key = Year, value = Value)

# drop company name, group , and year 2015
df$Company <- NULL
df$Group <- NULL
df$`2015` <- NULL


# split the df into a df_list around parameters while dropping the parameter column
dfl <- split(df, f = df$Parameter)
dfl <- lapply(dfl, function(x) subset(x, select = -Parameter))

summary(dfl)
##                          Length Class      Mode
## AP2Revenues              3      data.frame list
## AR2Revenues              3      data.frame list
## CapitalTurnover          3      data.frame list
## Cash2Revenues            3      data.frame list
## COGS2Revenue             3      data.frame list
## EBITMargin               3      data.frame list
## Inventories2Revenue      3      data.frame list
## NPPE2Revenue             3      data.frame list
## PrepaidExpenses2Revenues 3      data.frame list
## ROIC                     3      data.frame list
## SGA2Revenue              3      data.frame list
## WCR2Revenues             3      data.frame list
str(dfl$AP2Revenues)
## 'data.frame':    15 obs. of  3 variables:
##  $ Ticker: Factor w/ 15 levels "BMWYY","BRBY",..: 1 2 3 4 12 6 5 7 8 11 ...
##  $ 2013  : num  0.1014 0.0591 0.077 0.0473 0.2081 ...
##  $ 2014  : num  0.098 0.0748 0.0784 0.0478 0.1939 ...
# use ticker name as row column for each df in dfl & drop ticker column
for (item in 1:length(dfl)) {
  rownames(dfl[[item]]) <- dfl[[item]]$Ticker
  dfl[[item]]$Ticker <- NULL
}

head(dfl$EBITMargin, 7)
##             2013       2014
## BMWYY 0.10499744 0.11340655
## BRBY  0.21418923 0.19757060
## DDAIF 0.09166653 0.07812308
## EL    0.15334080 0.16984383
## RACE  0.15567750 0.14099104
## FCAU  0.04015143 0.03441905
## F     0.04942993 0.02918649
#build the a clusterization function
build_cluster_map <- function(dataframe, number_of_clusters) {
  optimum_cluster <- kmeans(dataframe, 
                            centers = number_of_clusters, 
                            nstart = 25)
  cluster_df <- as.data.frame(optimum_cluster$cluster)
  colnames(cluster_df)[1] <- "Cluster"
  
  return (
    list(
      cluster_table = cluster_df, 
      visualization = fviz_cluster(optimum_cluster, data = dataframe)
    )
  )
}


analyize_cluster <- function(dataframe, number_of_clusters = 3){
  k_max <- 10
  
  # Average Silhouette Width Analysis
  p1 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "silhouette") + 
    theme_minimal() + theme(axis.title.y = element_blank()) +
    ggtitle("Average Silhouette Width") + 
    geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")

  # Total Within Sum of Squares Analysis
  p2 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "wss") + 
    theme_minimal() + theme(axis.title.y = element_blank()) +
    ggtitle("Total Within Sum of Squares") + 
    geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
  
  # Gap Statistics (k) Analysis
  gap_stat <- clusGap(dataframe, FUN = kmeans, nstart = 25, K.max = k_max, B = 50)
  p3 <- fviz_gap_stat(gap_stat) + 
    theme_minimal() + theme(axis.title.y = element_blank()) +
    ggtitle("Gap Statistics (k)") + 
    geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")

  
  # Cluster Analysis
  cluster_map <- build_cluster_map(dataframe, number_of_clusters)
  
  p4 <-  cluster_map$visualization + 
    theme_minimal() + 
    ggtitle(paste("Cluster Plot for",number_of_clusters,"Clusters"))
  
  p5 <- ggplot() + 
    theme_minimal() + 
    annotation_custom(tableGrob(cluster_map$cluster_table))

  
  ggarrange(
    ggarrange(p1, p2, p3, widths = c(1,1), ncol = 3), 
    ggarrange(p4, p5, widths = c(2,1), ncol = 2), 
    nrow = 2, labels = c("A","B"), heights = c(2,3)
    )
  
}

Analyizing Financial Ratios

Analyizing All Ratios’ Principle Components

In this section we will utilize principle components to reduce the 12 financial ratios into two dimensions, we understand that this may cause loss of information due the reduction method, but our hypothesis is that performing clustering analysis on the principle components may reveal association between Ferrari & other companies that we can’t see by analyzing individual financial ratio.

# spread data around Parameter
dfw <- pivot_wider(df_tall, names_from = Parameter, values_from = Value)

# drop company name, group , and year 2015
dfw$Company <- NULL
dfw$Group <- NULL
dfw <- subset(dfw, Year != 2015)

dfw$Year <- factor(dfw$Year, levels = c(2013, 2014))

dfw <- as.data.frame(dfw)

# split the df into a df_list around Year while dropping the Year column
dfwl <- split(dfw, f = dfw$Year)
dfwl <- lapply(dfwl, function(x) subset(x, select = -Year))

summary(dfwl)
##      Length Class      Mode
## 2013 13     data.frame list
## 2014 13     data.frame list
head(dfwl$'2013', 7)
##    Ticker       ROIC EBITMargin CapitalTurnover COGS2Revenue SGA2Revenue
## 1    RACE 0.06820394 0.15567750       0.7065015    0.4129685  0.11128478
## 3    FCAU 0.06465148 0.04015143       1.2272602    0.8641429  0.07825624
## 5    TSLA 0.04775863 0.04978902       0.9846389    0.7733981  0.14182745
## 7   BMWYY 0.04247918 0.10499744       0.5958915    0.7992611  0.09541277
## 9   VLKAF 0.02738411 0.05924155       0.6431643    0.8175395  0.13927424
## 11  DDAIF 0.05921053 0.09166653       0.7434841    0.7876201  0.11978946
## 13     TM 0.02805701 0.06465752       0.7075484    0.8448528  0.09528149
##    NPPE2Revenue Cash2Revenues AR2Revenues Inventories2Revenue
## 1     0.2431410     0.3415023  0.45763445          0.10170130
## 3     0.2748492     0.2334911  0.03009582          0.12158997
## 5     0.5567029     0.4201096  0.02438992          0.16903684
## 7     0.5401333     0.1739439  0.31488713          0.12615207
## 9     0.3281508     0.1942114  0.05651068          0.14544153
## 11    0.4232764     0.1394535  0.06613721          0.14704785
## 13    0.3105139     0.1482338  0.32130427          0.07776337
##    PrepaidExpenses2Revenues WCR2Revenues AP2Revenues
## 1                0.00371263     2.084257  0.20807016
## 3                0.00522891     1.479959  0.20356087
## 5                0.01369459     1.875021  0.15096578
## 7                0.00000000     1.020534  0.10139497
## 9                0.00000000     1.030070  0.09148913
## 11               0.00250886     1.191734  0.07701175
## 13               0.02388639     1.067560  0.09580129
# build row name & scale all values
for (item in 1:length(dfwl)) {
  rownames(dfwl[[item]]) <- dfwl[[item]]$Ticker
  # use ticker name as row column for each df in dfl & drop ticker column
  dfwl[[item]]$Ticker <- NULL
  for (column in 1:length(dfwl[[item]])) {
    dfwl[[item]][[column]] <- scale(dfwl[[item]][[column]])
  }
}

head(dfwl$'2013', 7)
##             ROIC EBITMargin CapitalTurnover COGS2Revenue SGA2Revenue
## RACE  -0.2608454  0.3336409      -0.4813249   -0.6379818  -0.6645583
## FCAU  -0.3091011 -1.0662105       0.8919971    1.0838551  -0.8402640
## TSLA  -0.5385692 -0.9494300       0.2521669    0.7375417  -0.5020769
## BMWYY -0.6102839 -0.2804590      -0.7730206    0.8362438  -0.7489944
## VLKAF -0.8153315 -0.8348918      -0.6483551    0.9060004  -0.5156595
## DDAIF -0.3830095 -0.4419922      -0.3837961    0.7918179  -0.6193149
## TM    -0.8061910 -0.7692655      -0.4785639    1.0102375  -0.7496928
##       NPPE2Revenue Cash2Revenues AR2Revenues Inventories2Revenue
## RACE  -0.515177778     1.5429326   2.5433476         -0.68965509
## FCAU  -0.274048925     0.2983106  -0.7795861         -0.35624757
## TSLA   1.869347126     2.4487315  -0.8239337          0.43913708
## BMWYY  1.743341656    -0.3878563   1.4338809         -0.27976994
## VLKAF  0.131290655    -0.1543127  -0.5742834          0.04359260
## DDAIF  0.854687022    -0.7852925  -0.4994637          0.07052045
## TM    -0.002831248    -0.6841156   1.4837565         -1.09094314
##       PrepaidExpenses2Revenues WCR2Revenues AP2Revenues
## RACE                -0.3978348    1.1257034   1.9987086
## FCAU                -0.2826572   -0.1465036   1.9041269
## TSLA                 0.3604017    0.6852055   0.8009516
## BMWYY               -0.6798487   -1.1137156  -0.2387898
## VLKAF               -0.6798487   -1.0936413  -0.4465635
## DDAIF               -0.4892740   -0.7532951  -0.7502247
## TM                   1.1345778   -1.0147139  -0.3561165

2013 Cluster Analysis

df_to_analyze <- dfwl$'2013'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

2014 Cluster Analysis

df_to_analyze <- dfwl$'2014'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 7)

The two graphs above highlights that RACE is creating its unique clusters for both 2013/2014, yet we can notice the adjacency of RACE to the the cluster of PPRUY, LRCY, LVMUY and the cluster of HMC, BMWYY, VLKAF, DDAIF, TM. we will try to uncover that relationship in the next section by limiting the parameters of the principle components analysis.

Analyizing key Ratios’ Principle Components

In this section we will rerun the principle components analysis on ROIC, EBIT Margin, and Capital Turnover parameters only.

# build a limited data frame

dfwll <- dfwl


# build row name & scale all values
for (item in 1:length(dfwll)) {
  
  dfwll[[item]] <- select(dfwll[[item]], 
                          -c(COGS2Revenue,
                             SGA2Revenue, 
                             NPPE2Revenue, 
                             Cash2Revenues, 
                             AR2Revenues, 
                             Inventories2Revenue, 
                             PrepaidExpenses2Revenues, 
                             WCR2Revenues, 
                             AP2Revenues
                             )
                          )
}


head(dfwll$'2013', 7)
##             ROIC EBITMargin CapitalTurnover
## RACE  -0.2608454  0.3336409      -0.4813249
## FCAU  -0.3091011 -1.0662105       0.8919971
## TSLA  -0.5385692 -0.9494300       0.2521669
## BMWYY -0.6102839 -0.2804590      -0.7730206
## VLKAF -0.8153315 -0.8348918      -0.6483551
## DDAIF -0.3830095 -0.4419922      -0.3837961
## TM    -0.8061910 -0.7692655      -0.4785639

2013 Cluster Analysis

df_to_analyze <- dfwll$'2013'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

2014 Cluster Analysis

df_to_analyze <- dfwll$'2014'

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

The analysis above suggests that principle components cauterization over ROIC, EBIT Margin, and Capital Turnover suggests that RACE are in the same cluster of LVMUY, PPRUY, LRLCY with proximity to the cluster of BMWYY, VLKAF, DDAIF, TM, HMC.

Analyizing Indvidual Ratio

Account Payable to Revenue

df_to_analyze <- dfl$AP2Revenues

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 4)

Account Receivable to Revenue

df_to_analyze <- dfl$AR2Revenues

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 3)

Capital Turnover

df_to_analyze <- dfl$CapitalTurnover

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

Cash to Revenue

df_to_analyze <- dfl$Cash2Revenues

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 4)

EBITDA Margin

df_to_analyze <- dfl$EBITMargin

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

Inventory to Revenue

df_to_analyze <- dfl$Inventories2Revenue

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)

NPPE to Revenue

df_to_analyze <- dfl$NPPE2Revenue

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 4)

Prepaid Expenses to Revenue

df_to_analyze <- dfl$PrepaidExpenses2Revenues

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

ROIC

df_to_analyze <- dfl$ROIC

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)

SG&A to Revenue

df_to_analyze <- dfl$SGA2Revenue

# identify the optimum cluster size
analyize_cluster(df_to_analyze,6 )

WCR to Revenue

df_to_analyze <- dfl$WCR2Revenues

# identify the optimum cluster size
analyize_cluster(df_to_analyze, 3)